# 导入数据请求模块
import requests
# 导入数据解析模块
import parsel
# 导入正则表达式模块
import re
# 导入csv
import csv

# 创建文件对象
f = open('./data/二手房.csv', 'w', encoding='utf-8', newline='')
writer = csv.DictWriter(f, fieldnames=[
    '标题', '小区', '区域', '总价', '单价', '户型', '面积', '朝向', '装修', '楼层', '层数', '建筑结构', '年份'])
writer.writeheader()

# 模拟浏览器
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36'
}
for page in range(1, 10):
    try:
        print(f'==================正在采集第{page}页数据==================')
        # 请求地址
        url = f'https://cs.lianjia.com/ershoufang/pg{page}'
        # 发送请求
        response = requests.get(url, headers=headers)
        # 设置编码
        response.encoding = 'utf-8'
        # 获取响应文本数据（网页源代码）
        html_text = response.text
        # print(html_text)
        # 把获取到html字符串数据，转化成可解析的对象
        selector = parsel.Selector(html_text)
        # 提取30个房源数据对应div标签
        divs = selector.css('.sellListContent li .info')
        for div in divs:
            """
            提取具体数据内容
            """
            title = div.css('.title a::text').get()  # 提取标题
            area_list = div.css('.positionInfo a::text').getall()
            area = area_list[0]  # 小区
            area_1 = area_list[1]  # 区域
            total_price = div.css('.totalPrice span::text').get()  # 总价
            unit_price = div.css('.unitPrice span::text').get().replace('元/平', '')  # 单价
            house_info = div.css('.houseInfo::text').get().split('|')  # 信息
            house_type = house_info[0]  # 户型
            house_area = house_info[1]  # 面积
            face = house_info[2]  # 朝向
            decorate = house_info[3]  # 装修
            fool = house_info[4]  # 楼层
            if fool != '':
                fool_1 = house_info[4][0]
            fool_num = re.findall('\\d+', fool)[0]
            building = house_info[-1]  # 建筑
            if len(house_info) == 7:
                date = house_info[5]  # 年份
            else:
                date = '未知'
            dict = {
                '标题': title,
                '小区': area,
                '区域': area_1,
                '总价': total_price,
                '单价': unit_price,
                '户型': house_type,
                '面积': house_area,
                '朝向': face,
                '装修': decorate,
                '楼层': fool,
                '层数': fool_num,
                '建筑结构': building,
                '年份': date
            }
            writer.writerow(dict)
            print(dict)
    except:
        pass
# 关闭文件
f.close()
